Purpose: Collate EcoDrought streamflow and temperature data, povided by EcoD PIs/partners and NWIS

Notes:

2.1 Gather site information

Code
# West Brook
siteinfo_wb <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Mass/MA_site_info.csv") %>%
  mutate(Station_No = factor(Station_No), Site_Name = factor(Site_Name)) %>%
  rename(site_id = Site_ID, station_no = Station_No, site_name = Site_Name, lat = Latitude_dec_deg, long = Longitude_dec_deg, elev_ft = Elevation_ft, area_sqmi = Drainage_Area_sqmi) %>%
  mutate(designation = "little", basin = "West Brook", region = "Mass") #%>% select(-c(elev_ft, area_sqmi)) 

# Shenandoah
siteinfo_shen <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Virg/VA_site_info.csv") %>%
  mutate(Station_No = factor(Station_No), Site_Name = factor(Site_Name)) %>%
  rename(site_id = Site_ID, station_no = Station_No, site_name = Site_Name, lat = Latitude_dec_deg, long = Longitude_dec_deg, elev_ft = Elevation_ft, area_sqmi = Drainage_Area_sqmi) %>%
  mutate(designation = ifelse(str_detect(site_id, "10FL"), "big", "little"), 
         basin = str_sub(site_name, 1, str_length(site_name)-3), region = "Shen") #%>% select(-c(elev_ft, area_sqmi))

# Flathead/Muhlfeld
siteinfo_flat <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Flathead/Flathead_SiteInfo_UpdateOct25.csv") %>% 
  select(basin, site_name, site_id, region, designation, lat, long) %>%
  rename(subbasin = basin) %>%
  mutate(basin = "Flathead", region = "Flat") %>%
  select(site_id, site_name, lat, long, designation, basin, subbasin, region) %>%
  filter(designation == "little")
  
# GYA/Al-Chokhachy
siteinfo_gya <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Al-Chokhachy/Al-Chokhachy_sites.csv") %>%
  mutate(region = ifelse(basin == "Snake River", "Snake", "Shields"), 
         designation = "little") %>%
  select(site_id, site_name, latitude, longitude, designation, basin, region) %>% 
  rename(lat = latitude, long = longitude)
  

# NWIS Medium/Big/Super G
sites <- c("01169900", # South River at Conway, Massachusetts
           "13011500", # Pacific Creek, Snake River, Wyoming
           "06195600", # Shields River at Livingston, Montana
           "12355500", # North Fork Flathead River, Montana
           "10396000", # Donner Blitzen River near Frenchglen, Oregon
           # Medium G
           "12355347", # Big Creek (Flathead)
           "12355342", # Hallowat Creek (Flathead)
           "06192980", # Shields Rivera above Smith Creek (GYA)
           "06192900", # Dugout Creek Mouth (GYA)
           "13012475", # South Fork Spread Creek (GYA)
           "13012465", # Leidy Creek, lower (GYA)
           "01171100", # West Brook (Mass)
           "01171000",  # Avery Brook (Mass)
           "424551118503200", # Fish Creek at DB confluence (Oreg)
           "424547118503500", # DB above Fish Creek (Oreg)
           "424325118495900", # DB near Burnt Car Spring (Oreg)
           "424003118453700", # Little Blitzen River (Oreg)
           "423830118453200", # Indian Creek (Oreg)
           "423815118453900" # DB above Indian Creek (Oreg)
           )
siteinfo_nwis <- tibble(readNWISsite(sites)[,c(2:3,7,8,20,30)]) # get site info
names(siteinfo_nwis) <- c("station_no", "site_name", "lat", "long", "elev_ft", "area_sqmi") # rename columns
siteinfo_nwis <- siteinfo_nwis %>% mutate(site_name = c("South River Conway NWIS", 
                                                        "Avery Brook NWIS", 
                                                        "West Brook NWIS", 
                                                        "Dugout Creek NWIS", 
                                                        "Shields River ab Smith NWIS", 
                                                        "Shields River nr Livingston NWIS", 
                                                        "Donner Blitzen River nr Frenchglen NWIS", 
                                                        "Hallowat Creek NWIS", 
                                                        "Big Creek NWIS", 
                                                        "North Fork Flathead River NWIS", 
                                                        "Pacific Creek at Moran NWIS", 
                                                        "Leidy Creek Mouth NWIS", 
                                                        "SF Spread Creek Lower NWIS",
                                                        "Donner Blitzen ab Indian NWIS",
                                                        "Indian Creek NWIS",
                                                        "Little Blizten River NWIS",
                                                        "Donner Blitzen nr Burnt Car NWIS",
                                                        "Donner Blitzen ab Fish NWIS",
                                                        "Fish Creek NWIS"),
                                          site_id = c("SRC", "AVB", "WBR", "DUG", "SRS", "SRL", "DBF", "HAL", "BIG", "NFF", "PCM", "LEI", "SFS", "DBI", "IND", "LBL", "DBB", "DBA", "FSH"),
                                          designation = c("big", "medium", "medium", "medium", "medium", "big", "big", "medium", "medium", "big", "big", "medium", "medium", "medium", "medium", "medium", "medium", "medium", "medium"),
                                          basin = c("West Brook", "West Brook", "West Brook", "Shields River", "Shields River", "Shields River", "Donner Blitzen", "Flathead", "Flathead", "Flathead", "Snake River", "Snake River", "Snake River", "Donner Blitzen", "Donner Blitzen", "Donner Blitzen", "Donner Blitzen", "Donner Blitzen", "Donner Blitzen"),
                                          region = c("Mass", "Mass", "Mass", "Shields", "Shields", "Shields", "Oreg", "Flat", "Flat", "Flat", "Snake", "Snake", "Snake","Oreg", "Oreg", "Oreg", "Oreg", "Oreg", "Oreg")) %>% 
  select(site_id, site_name, lat, long, station_no, designation, basin, region, elev_ft, area_sqmi)
#mapview(st_as_sf(siteinfo_nwis, coords = c("long", "lat"), crs = 4326))


# bind together, fill in ragged subbasin
siteinfo <- bind_rows(siteinfo_wb, siteinfo_shen, siteinfo_flat, siteinfo_gya, siteinfo_nwis)
siteinfo$subbasin[siteinfo$site_name == "Hallowat Creek NWIS"] <- "Big Creek"
siteinfo$subbasin[siteinfo$site_name == "Big Creek NWIS"] <- "Big Creek"
siteinfo <- siteinfo %>% mutate(subbasin = ifelse(is.na(subbasin), basin, subbasin))


# fix Shields River Valley Ranch site locations
siteinfo$lat[siteinfo$site_id == "SH07"] <- siteinfo$lat[siteinfo$site_id == "SRS"]
siteinfo$long[siteinfo$site_id == "SH07"] <- siteinfo$long[siteinfo$site_id == "SRS"]

# add data source 
siteinfo$source <- ifelse(grepl("NWIS", siteinfo$site_name), "NWIS", "ECOD")

# add elevation and area variables (from watershed delineation)
areafiles <- list.files("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Watershed Delineation/Area and Elevation")
arealist <- list()
for (i in 1:length(areafiles)) { arealist[[i]] <- read_csv(paste("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Watershed Delineation/Area and Elevation/", areafiles[i], sep = ""))}
areaelev <- do.call(rbind, arealist)
# how well do provided and delineation area/elevation match?
# siteinfo %>% left_join(areaelev, by = "site_id") %>% ggplot() + geom_point(aes(x = area_sqmi.x, y = area_sqmi.y)) + geom_abline(intercept = 0, slope = 1) + facet_wrap(~basin, scales = "free")
# test %>% ggplot() + geom_point(aes(x = elev_ft.x, y = elev_ft.y)) + geom_abline(intercept = 0, slope = 1) + facet_wrap(~basin, scales = "free")
# add delineated variables
siteinfo <- siteinfo %>% select(-c(area_sqmi, elev_ft)) %>% left_join(areaelev)
# fix NF Flathead (no dem from Canada)
siteinfo$area_sqmi[siteinfo$site_id == "NFF"] <- 1556

Write and re-load site information

Code
write_csv(siteinfo, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
head(siteinfo)
# A tibble: 6 × 12
  site_id site_name       lat  long station_no designation basin region subbasin
  <chr>   <chr>         <dbl> <dbl> <chr>      <chr>       <chr> <chr>  <chr>   
1 AB      Avery Brook    42.4 -72.7 01171000   little      West… Mass   West Br…
2 JB      Jimmy Brook    42.4 -72.7 01171040   little      West… Mass   West Br…
3 MB      Mitchell Bro…  42.4 -72.7 01171080   little      West… Mass   West Br…
4 OL      Obear Brook …  42.4 -72.7 01171070   little      West… Mass   West Br…
5 SD      Sanderson Br…  42.4 -72.7 01171010   little      West… Mass   West Br…
6 WL      West Brook L…  42.4 -72.7 01171090   little      West… Mass   West Br…
# ℹ 3 more variables: source <chr>, area_sqmi <dbl>, elev_ft <dbl>

View unique basin names

Code
unique(siteinfo$basin)
[1] "West Brook"     "Paine Run"      "Piney River"    "Staunton River"
[5] "Flathead"       "Duck Creek"     "Shields River"  "Snake River"   
[9] "Donner Blitzen"

View unique site names

Code
unique(siteinfo$site_name)
  [1] "Avery Brook"                            
  [2] "Jimmy Brook"                            
  [3] "Mitchell Brook"                         
  [4] "Obear Brook Lower"                      
  [5] "Sanderson Brook"                        
  [6] "West Brook Lower"                       
  [7] "West Brook Upper"                       
  [8] "West Brook Reservoir"                   
  [9] "West Whately Brook"                     
 [10] "West Brook 0"                           
 [11] "Paine Run 01"                           
 [12] "Paine Run 02"                           
 [13] "Paine Run 03"                           
 [14] "Paine Run 04"                           
 [15] "Paine Run 05"                           
 [16] "Paine Run 06"                           
 [17] "Paine Run 07"                           
 [18] "Paine Run 08"                           
 [19] "Paine Run 09"                           
 [20] "Paine Run 10"                           
 [21] "Piney River 01"                         
 [22] "Piney River 02"                         
 [23] "Piney River 03"                         
 [24] "Piney River 04"                         
 [25] "Piney River 05"                         
 [26] "Piney River 06"                         
 [27] "Piney River 08"                         
 [28] "Piney River 09"                         
 [29] "Piney River 10"                         
 [30] "Staunton River 01"                      
 [31] "Staunton River 02"                      
 [32] "Staunton River 03"                      
 [33] "Staunton River 04"                      
 [34] "Staunton River 05"                      
 [35] "Staunton River 06"                      
 [36] "Staunton River 07"                      
 [37] "Staunton River 08"                      
 [38] "Staunton River 09"                      
 [39] "Staunton River 10"                      
 [40] "BigCreekLower"                          
 [41] "BigCreekMiddle"                         
 [42] "BigCreekUpper"                          
 [43] "CoalCreekHeadwaters"                    
 [44] "CoalCreekLower"                         
 [45] "CoalCreekMiddle"                        
 [46] "CycloneCreekLower"                      
 [47] "CycloneCreekMiddle"                     
 [48] "CycloneCreekUpper"                      
 [49] "HallowattCreekLower"                    
 [50] "LangfordCreekLower"                     
 [51] "LangfordCreekUpper"                     
 [52] "McGeeCreekLower"                        
 [53] "McGeeCreekTrib"                         
 [54] "McGeeCreekUpper"                        
 [55] "MeadowCreek"                            
 [56] "NicolaCreek"                            
 [57] "CoalCreekNorth"                         
 [58] "SkookoleelCreek"                        
 [59] "WernerCreek"                            
 [60] "WoundedBuckCreek"                       
 [61] "EF Duck Creek be HF"                    
 [62] "EF Duck Creek ab HF"                    
 [63] "Henrys Fork"                            
 [64] "Brackett Creek"                         
 [65] "Buck Creek"                             
 [66] "Crandall Creek"                         
 [67] "Deep Creek"                             
 [68] "Dugout Creek"                           
 [69] "Lodgepole Creek"                        
 [70] "Shields River Valley Ranch"             
 [71] "Shields River ab Dugout"                
 [72] "Grizzly Creek"                          
 [73] "Grouse Creek"                           
 [74] "Leidy Creek Lower"                      
 [75] "Leidy Creek Upper"                      
 [76] "Leidy Creek Mouth"                      
 [77] "NF Spread Creek Lower"                  
 [78] "NF Spread Creek Upper"                  
 [79] "Rock Creek"                             
 [80] "SF Spread Creek Lower"                  
 [81] "SF Spread Creek Upper"                  
 [82] "Spread Creek Dam"                       
 [83] "South River Conway NWIS"                
 [84] "Avery Brook NWIS"                       
 [85] "West Brook NWIS"                        
 [86] "Dugout Creek NWIS"                      
 [87] "Shields River ab Smith NWIS"            
 [88] "Shields River nr Livingston NWIS"       
 [89] "Donner Blitzen River nr Frenchglen NWIS"
 [90] "Hallowat Creek NWIS"                    
 [91] "Big Creek NWIS"                         
 [92] "North Fork Flathead River NWIS"         
 [93] "Pacific Creek at Moran NWIS"            
 [94] "Leidy Creek Mouth NWIS"                 
 [95] "SF Spread Creek Lower NWIS"             
 [96] "Donner Blitzen ab Indian NWIS"          
 [97] "Indian Creek NWIS"                      
 [98] "Little Blizten River NWIS"              
 [99] "Donner Blitzen nr Burnt Car NWIS"       
[100] "Donner Blitzen ab Fish NWIS"            
[101] "Fish Creek NWIS"                        

Map sites

Code
# convert to spatial object and view on map
#| fig-cap: "Map of EcoDrought project locations"
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)
mapview(siteinfo_sp, zcol = "designation")

2.2 Load EcoD data

Code
# West Brook
dat_wb <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Mass/EcoDrought Continuous_MA.csv") %>%
  mutate(Station_No = factor(Station_No), Site_Name = factor(Site_Name)) %>%
  rename(station_no = Station_No, site_name = Site_Name, datetime = DateTime_EST, height = GageHeight_Hobo_ft, flow = Discharge_Hobo_cfs, tempf = WaterTemperature_HOBO_DegF) %>%
  mutate(tempc = (tempf - 32)*(5/9)) %>% select(station_no, site_name, datetime, height, flow, tempc) %>%
  left_join(siteinfo %>% select(-station_no) %>% filter(source == "ECOD")) %>%
  mutate(DischargeReliability = as.factor(1), TempReliability = as.factor(1))
# set tz to local and convert to UTC
tz(dat_wb$datetime) <- "EST"
dat_wb$datetime <- with_tz(dat_wb$datetime, "UTC")
head(dat_wb)
# A tibble: 6 × 18
  station_no site_name   datetime            height  flow    tempc site_id   lat
  <fct>      <chr>       <dttm>               <dbl> <dbl>    <dbl> <chr>   <dbl>
1 01171000   Avery Brook 2020-02-20 05:00:00   4.17  5.37  0.00556 AB       42.4
2 01171000   Avery Brook 2020-02-20 05:15:00   4.17  5.3   0.00556 AB       42.4
3 01171000   Avery Brook 2020-02-20 05:30:00   4.16  5.17  0.00556 AB       42.4
4 01171000   Avery Brook 2020-02-20 05:45:00   4.17  5.27  0.00556 AB       42.4
5 01171000   Avery Brook 2020-02-20 06:00:00   4.17  5.3   0.00556 AB       42.4
6 01171000   Avery Brook 2020-02-20 06:15:00   4.15  4.94 -0.0556  AB       42.4
# ℹ 10 more variables: long <dbl>, designation <chr>, basin <chr>,
#   region <chr>, subbasin <chr>, source <chr>, area_sqmi <dbl>, elev_ft <dbl>,
#   DischargeReliability <fct>, TempReliability <fct>
Code
# Shenandoah
dat_shen <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Virg/EcoDrought_Continuous_VA.csv") %>%
  mutate(Station_No = factor(Station_No), Site_ID = factor(Site_ID), Discharge_Hobo_cfs = as.numeric(Discharge_Hobo_cfs)) %>%
  rename(station_no = Station_No, site_id = Site_ID, datetime = DateTime_EST, height = GageHeight_Hobo_ft, flow = Discharge_Hobo_cfs, tempf = WaterTemperature_HOBO_DegF) %>%
  mutate(tempc = (tempf - 32)*(5/9)) %>% select(station_no, site_id, datetime, height, flow, tempc) %>%
  left_join(siteinfo %>% filter(source == "ECOD"))
# set tz to local and convert to UTC
tz(dat_shen$datetime) <- "EST"
dat_shen$datetime <- with_tz(dat_shen$datetime, "UTC")

# pull in Big G data separately (UVA long-term gage sites)
dat_shen_uva_q <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Virg/Shen_BigG_Discharge_hourly_UVA.csv") %>%
  rename(flow = cfs)
dat_shen_uva_t <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Virg/Shen_BigG_TempEtc_hourly_UVA.csv") %>%
  select(site_id, datetime, tempc_mean) %>% rename(tempc = tempc_mean)
dat_shen_uva <- dat_shen_uva_q %>% left_join(dat_shen_uva_t) %>% left_join(siteinfo %>% filter(source == "ECOD"))
# set tz to local and convert to UTC
tz(dat_shen_uva$datetime) <- "EST"
dat_shen_uva$datetime <- with_tz(dat_shen_uva$datetime, "UTC")

# bind usgs and uva data
dat_shen <- bind_rows(dat_shen, dat_shen_uva) %>% mutate(DischargeReliability = as.factor(1), TempReliability = as.factor(1))
head(dat_shen)
# A tibble: 6 × 18
  station_no site_id datetime            height  flow tempc site_name      lat
  <chr>      <chr>   <dttm>               <dbl> <dbl> <dbl> <chr>        <dbl>
1 0162732260 PA_01FL 2018-11-07 05:00:00   10.8  6.11  11.6 Paine Run 01  38.2
2 0162732260 PA_01FL 2018-11-07 05:30:00   10.8  6.38  11.6 Paine Run 01  38.2
3 0162732260 PA_01FL 2018-11-07 06:00:00   10.8  6.36  11.4 Paine Run 01  38.2
4 0162732260 PA_01FL 2018-11-07 06:30:00   10.8  6.17  11.4 Paine Run 01  38.2
5 0162732260 PA_01FL 2018-11-07 07:00:00   10.8  6.35  11.3 Paine Run 01  38.2
6 0162732260 PA_01FL 2018-11-07 07:30:00   10.8  6.4   11.3 Paine Run 01  38.2
# ℹ 10 more variables: long <dbl>, designation <chr>, basin <chr>,
#   region <chr>, subbasin <chr>, source <chr>, area_sqmi <dbl>, elev_ft <dbl>,
#   DischargeReliability <fct>, TempReliability <fct>
Code
# Flathead/Muhlfeld
flatfiles <- list.files("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Flathead/Export3/EMA/Continuous Data")
flatlist <- list()
for (i in 1:length(flatfiles)) { 
  flatlist[[i]] <- read_csv(paste("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Flathead/Export3/EMA/Continuous Data/", flatfiles[i], sep = "")) %>%
    mutate(#DateTime = mdy_hm(DateTime, tz = "MST"),
           site_name = gsub("EMA.csv", "", flatfiles[i])) %>% 
    select(ISO.8601.UTC, GageHeightFT, DischargeCFS, TemperatureF, site_name, DischargeReliability_JB, GageReliability, TempReliability_JB) %>% rename(DischargeReliability = DischargeReliability_JB, TempReliability = TempReliability_JB)
}
dat_flat <- bind_rows(flatlist) %>%
  filter(!is.na(ISO.8601.UTC)) %>% 
  rename(datetime = ISO.8601.UTC, height = GageHeightFT, flow = DischargeCFS, tempf = TemperatureF) %>%
  mutate(tempc = (tempf - 32) * (5/9), datetime = floor_date(datetime, unit = "minute")) %>% 
  select(-tempf) %>%
  group_by(site_name, datetime) %>% 
  summarise(height = mean(height, na.rm = TRUE), flow = mean(flow, na.rm = TRUE), tempc = mean(tempc, na.rm = TRUE),
            DischargeReliability = unique(DischargeReliability), 
            GageReliability = unique(GageReliability), 
            TempReliability = unique(TempReliability)) %>%
  left_join(siteinfo %>% filter(source == "ECOD")) %>% ungroup() %>% 
  mutate(DischargeReliability = as_factor(DischargeReliability),
         GageReliability = as_factor(GageReliability),
         TempReliability = as_factor(TempReliability))
# set tz to local and convert to UTC
tz(dat_flat$datetime) <- "UTC"
head(dat_flat)
# A tibble: 6 × 19
  site_name     datetime            height  flow tempc DischargeReliability
  <chr>         <dttm>               <dbl> <dbl> <dbl> <fct>               
1 BigCreekLower 2017-09-18 18:45:00   1.64  38.1  8.59 1                   
2 BigCreekLower 2017-09-18 19:45:00   1.65  38.7  8.69 1                   
3 BigCreekLower 2017-09-18 20:45:00   1.65  39.4  8.79 1                   
4 BigCreekLower 2017-09-18 21:45:00   1.66  39.6  8.79 1                   
5 BigCreekLower 2017-09-18 22:45:00   1.67  41.2  8.79 1                   
6 BigCreekLower 2017-09-18 23:45:00   1.67  41.3  8.69 1                   
# ℹ 13 more variables: GageReliability <fct>, TempReliability <fct>,
#   site_id <chr>, lat <dbl>, long <dbl>, station_no <chr>, designation <chr>,
#   basin <chr>, region <chr>, subbasin <chr>, source <chr>, area_sqmi <dbl>,
#   elev_ft <dbl>
Code
# Greater Yellowstone/Al-Chokhachy
gyafiles <- list.files("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Al-Chokhachy/Al-Chokhachy data files")
gyalist <- list()
for (i in 1:length(gyafiles)) { 
  gyalist[[i]] <- read_csv(paste("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Al-Chokhachy/Al-Chokhachy data files/", gyafiles[i], sep = "")) %>%
    mutate(date = mdy(date), datetime = ymd_hms(paste(date, time, sep = " "), tz = "MST"), discharge = as.numeric(discharge)*35.314666212661)
}
dat_gya <- bind_rows(gyalist) %>% select(datetime, depth, discharge, temperature, location) %>% 
  rename(height = depth, flow = discharge, tempc = temperature, site_name = location) %>%
  filter(site_name != "EF Henrys") %>% # drop weird duplicate site/year?
  mutate(site_name = dplyr::recode(site_name,
                            "EF Above Confluence" = "EF Duck Creek ab HF",
                            "EF Below Confluence" = "EF Duck Creek be HF",
                            "NF Spread Creek" = "NF Spread Creek Lower",
                            "Upper NF Spread Creek" = "NF Spread Creek Upper",
                            "SF Spread Creek" = "SF Spread Creek Lower",
                            "Upper SF Spread Creek" = "SF Spread Creek Upper",
                            "Shields River above Dugout Creek" = "Shields River ab Dugout",
                            "Upper Leidy Creek" = "Leidy Creek Upper", 
                            "Leidy Creek" = "Leidy Creek Mouth",
                            "Spread Creek" = "Spread Creek Dam",
                            "Shields River above Smith Creek" = "Shields River Valley Ranch")) %>%
  left_join(siteinfo %>% filter(source == "ECOD")) %>% filter(tempc <= 100) %>%
  mutate(DischargeReliability = as.factor(1), TempReliability = as.factor(1))
# set tz to local and convert to UTC
tz(dat_gya$datetime) <- "MST"
dat_gya$datetime <- with_tz(dat_gya$datetime, "UTC")
head(dat_gya)
# A tibble: 6 × 18
  datetime            height  flow tempc site_name      site_id   lat  long
  <dttm>               <dbl> <dbl> <dbl> <chr>          <chr>   <dbl> <dbl>
1 2023-06-18 17:52:00  0.539  28.2  7.38 Brackett Creek SH01     45.9 -111.
2 2023-06-18 18:52:00  0.595  38.4  7.68 Brackett Creek SH01     45.9 -111.
3 2023-06-18 19:52:00  0.63   46.7  7.88 Brackett Creek SH01     45.9 -111.
4 2023-06-18 20:52:00  0.663  56.0  7.98 Brackett Creek SH01     45.9 -111.
5 2023-06-18 21:52:00  0.74   85.8  7.98 Brackett Creek SH01     45.9 -111.
6 2023-06-18 22:52:00  0.771 102.   8.28 Brackett Creek SH01     45.9 -111.
# ℹ 10 more variables: station_no <chr>, designation <chr>, basin <chr>,
#   region <chr>, subbasin <chr>, source <chr>, area_sqmi <dbl>, elev_ft <dbl>,
#   DischargeReliability <fct>, TempReliability <fct>

Bind EcoD hourly flow/temp data with siteinfo and write to file

Code
dat <- bind_rows(dat_wb, dat_shen, dat_flat, dat_gya)

Check for duplicates

Code
dups <- dat %>% group_by(site_name, datetime) %>% filter(n()>1) %>% arrange(site_name, datetime)
nrow(dups)/2
[1] 9007.5
Code
unique(dups$site_name)
[1] "EF Duck Creek ab HF"   "EF Duck Creek be HF"   "Grouse Creek"         
[4] "Henrys Fork"           "Leidy Creek Mouth"     "NF Spread Creek Upper"
[7] "Piney River 10"        "Rock Creek"            "Staunton River 10"    

Write to file

Code
write_csv(dat, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw.csv")
dat <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw.csv")

Check unique designations

Code
unique(dat$designation)
[1] "little" "big"   

Check unique regions

Code
unique(dat$region)
[1] "Mass"    "Shen"    "Flat"    "Shields" "Snake"  

Check unique basins

Code
unique(dat$basin)
[1] "West Brook"     "Paine Run"      "Staunton River" "Piney River"   
[5] "Flathead"       "Shields River"  "Duck Creek"     "Snake River"   

Check unique subbasins

Code
unique(dat$subbasin)
 [1] "West Brook"         "Paine Run"          "Staunton River"    
 [4] "Piney River"        "Big Creek"          "Coal Creek"        
 [7] "McGee Creek"        "Wounded Buck Creek" "Shields River"     
[10] "Duck Creek"         "Snake River"       

2.2.1 Inspect raw data

2.2.1.1 West Brook

Join data update

Code
dat_wb_new <- dat_wb %>%
  bind_rows(read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Mass/EcoDrought Continuous_MA_updateDec2024.csv") %>% 
            rename(datetime = TimeStamp) %>%
            gather(key = "station_no", value = "flow", 2:9) %>% 
            mutate(datetime = as_datetime(datetime),
                   station_no = factor(sub(".*Discharge@", "", station_no))) %>% 
            left_join(siteinfo %>% mutate(station_no = factor(station_no))))

2.2.1.2 Flathead

Code
mysites <- unlist(unique(dat %>% filter(basin == "Flathead") %>% select(site_name)))

2.2.1.3 Spread Creek

Code
mysites <- unlist(unique(dat %>% filter(basin == "Snake River") %>% select(site_name)))

2.3 Load NWIS data

2.3.1 Download NWIS

Download sub-daily flow and temp data from NWIS

Code
nwis_list <- list()
for (i in 1:length(sites)) {
  nwis_list[[i]] <- tibble(readNWISdata(sites = sites[i], service = "uv", parameterCd = c("00010", "00060"), 
                                        startDate = "1980-01-01", endDate = Sys.Date(), tz = "UTC"))
  print(i)
}
nwis_subdaily <- do.call(bind_rows, nwis_list)
nwis_subdaily <- nwis_subdaily %>% select(-1)
names(nwis_subdaily) <- c("station_no", "datetime", "flowcfs", "flowcfs_appcd", "tz", "tempc", "tempc_appcd")
nwis_subdaily <- nwis_subdaily %>% left_join(siteinfo %>% filter(source == "NWIS"))
write_csv(nwis_subdaily, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_NWIS_FlowTempData_Raw_SubDaily.csv")
Code
nwis_subdaily <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_NWIS_FlowTempData_Raw_SubDaily.csv") %>% 
  mutate(site_name = dplyr::recode(site_name, "Fish Creek" = "Fish Creek NWIS", "Avery Broook NWIS" = "Avery Brook NWIS")) %>% filter(grepl("NWIS", site_name))

tz(nwis_subdaily$datetime) <- "UTC"
nwis_subdaily$datetime <- with_tz(nwis_subdaily$datetime, "UTC")

head(nwis_subdaily)
# A tibble: 6 × 17
  station_no datetime            flowcfs flowcfs_appcd tz    tempc tempc_appcd
  <chr>      <dttm>                <dbl> <chr>         <chr> <dbl> <chr>      
1 01169900   1990-10-01 04:15:00      15 A [91]        UTC      NA <NA>       
2 01169900   1990-10-01 04:30:00      16 A [91]        UTC      NA <NA>       
3 01169900   1990-10-01 04:45:00      16 A [91]        UTC      NA <NA>       
4 01169900   1990-10-01 05:00:00      16 A [91]        UTC      NA <NA>       
5 01169900   1990-10-01 05:15:00      16 A [91]        UTC      NA <NA>       
6 01169900   1990-10-01 05:30:00      16 A [91]        UTC      NA <NA>       
# ℹ 10 more variables: site_id <chr>, site_name <chr>, lat <dbl>, long <dbl>,
#   designation <chr>, basin <chr>, region <chr>, subbasin <chr>,
#   area_sqmi <dbl>, elev_ft <dbl>

2.3.2 Check approval

  • A, A[91], A[92], A[93]: approved and historical daily values ~match observed data
  • A e: approved estimated data, often during ice affected periods (smoothed data)
  • P: provisional data yet to be approved, typically these are just more recent observations
  • P e: provisional estimated data, often during ice affected periods
  • P Ice: ice affected observations
  • P dis: provisional data yet to be approved, site has been discontinued
  • NA: missing value, usually b/c temp is observed but flow is not
Code
unique(nwis_subdaily$flowcfs_appcd)
 [1] "A [91]" "A [92]" "A [93]" "A"      "A e"    "P"      "P e"    "P Ice" 
 [9] NA       "P Dis" 

Check approval for key sites:

2.3.3 View data

Code
# dat_superg_nwis <- nwis_subdaily %>%
#   filter(flowcfs_appcd %in% c("A [91]", "A [92]", "A [93]", "A")) %>%
#   mutate(date = as_date(datetime)) %>%
#   group_by(date, station_no, site_id, site_name, lat, long, designation, basin, region, subbasin, area_sqmi, elev_ft) %>%
#   summarise(flow_mean = mean(flowcfs, na.rm = TRUE), flow_min = min(flowcfs, na.rm = TRUE), flow_max = max(flowcfs, na.rm = TRUE),
#             tempc_mean = mean(tempc, na.rm = TRUE), tempc_min = min(tempc, na.rm = TRUE), tempc_max = max(tempc, na.rm = TRUE)) %>%
#   ungroup()
# head(dat_superg_nwis)
Code
nwis_subdaily %>% filter(designation == "big") %>% ggplot() + geom_line(aes(x = datetime, y = flowcfs)) + facet_wrap(~site_name, scales = "free_y")

Stream flow (cfs) for Big G NWIS gages
Code
nwis_subdaily %>% filter(designation == "medium") %>% ggplot() + geom_line(aes(x = datetime, y = flowcfs)) + facet_wrap(~site_name, scales = "free_y" )

Stream flow (cfs) for Medium G NWIS gages

2.4 Bind EcoD and NWIS

Bind EcoD and NWIS data, specify common reliability/approval codes. Also round date-times to nearest minute and summarize flow/temp data as loggers at some sites (e.g., in the Flathead) will take multiple readings within seconds of each other

Code
dat_full <- bind_rows(dat %>% mutate(tz = tz(datetime)), nwis_subdaily %>% rename(flow = flowcfs)) %>%
  mutate(DischargeReliability = ifelse(!is.na(DischargeReliability), DischargeReliability, 
                                       ifelse(flowcfs_appcd %in% c("A [91]", "A [92]", "A [93]", "A"), 1, 0)),
         TempReliability = ifelse(!is.na(TempReliability), TempReliability, 
                                  ifelse(flowcfs_appcd %in% c("A"), 1, 0)))
head(dat_full)
# A tibble: 6 × 22
  station_no site_name   datetime            height  flow    tempc site_id   lat
  <chr>      <chr>       <dttm>               <dbl> <dbl>    <dbl> <chr>   <dbl>
1 01171000   Avery Brook 2020-02-20 05:00:00   4.17  5.37  0.00556 AB       42.4
2 01171000   Avery Brook 2020-02-20 05:15:00   4.17  5.3   0.00556 AB       42.4
3 01171000   Avery Brook 2020-02-20 05:30:00   4.16  5.17  0.00556 AB       42.4
4 01171000   Avery Brook 2020-02-20 05:45:00   4.17  5.27  0.00556 AB       42.4
5 01171000   Avery Brook 2020-02-20 06:00:00   4.17  5.3   0.00556 AB       42.4
6 01171000   Avery Brook 2020-02-20 06:15:00   4.15  4.94 -0.0556  AB       42.4
# ℹ 14 more variables: long <dbl>, designation <chr>, basin <chr>,
#   region <chr>, subbasin <chr>, source <chr>, area_sqmi <dbl>, elev_ft <dbl>,
#   DischargeReliability <dbl>, TempReliability <dbl>, GageReliability <dbl>,
#   tz <chr>, flowcfs_appcd <chr>, tempc_appcd <chr>

Check for duplicates

Code
dups <- dat_full %>% group_by(site_name, datetime) %>% filter(n()>1) %>% arrange(site_name, datetime)
nrow(dups)/2
[1] 9007.5
Code
unique(dups$site_name)
[1] "EF Duck Creek ab HF"   "EF Duck Creek be HF"   "Grouse Creek"         
[4] "Henrys Fork"           "Leidy Creek Mouth"     "NF Spread Creek Upper"
[7] "Piney River 10"        "Rock Creek"            "Staunton River 10"    

Write data to file

Code
write_csv(dat_full, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw_ECODandNWIS.csv")

Get daily summaries for DischargeReliability == 1 only! Duplicate Shields River Livingston data for Duck Creek big G.

Code
dat_daily <- dat_full %>% 
  mutate(date = as_date(datetime),
         flow = ifelse(DischargeReliability == 1, flow, NA),
         tempc = ifelse(TempReliability == 1, tempc, NA)) %>% 
  group_by(station_no, site_name, site_id, basin, subbasin, region, lat, long, elev_ft, area_sqmi, designation, date) %>% 
  summarize(DischargeReliability = max(DischargeReliability),
            TempReliability = max(TempReliability),
            flow_mean = mean(flow), flow_min = min(flow), flow_max = max(flow),
            tempc_mean = mean(tempc), tempc_min = min(tempc), tempc_max = max(tempc)) %>%
  arrange(region, basin, site_name, date) %>%
  ungroup()
dat_daily <- bind_rows(dat_daily, dat_daily %>% filter(site_id == "SRL") %>% mutate(subbasin = "Duck Creek")) 
head(dat_daily)
# A tibble: 6 × 20
  station_no site_name      site_id basin    subbasin region   lat  long elev_ft
  <chr>      <chr>          <chr>   <chr>    <chr>    <chr>  <dbl> <dbl>   <dbl>
1 12355347   Big Creek NWIS BIG     Flathead Big Cre… Flat    48.6 -114.   3528.
2 12355347   Big Creek NWIS BIG     Flathead Big Cre… Flat    48.6 -114.   3528.
3 12355347   Big Creek NWIS BIG     Flathead Big Cre… Flat    48.6 -114.   3528.
4 12355347   Big Creek NWIS BIG     Flathead Big Cre… Flat    48.6 -114.   3528.
5 12355347   Big Creek NWIS BIG     Flathead Big Cre… Flat    48.6 -114.   3528.
6 12355347   Big Creek NWIS BIG     Flathead Big Cre… Flat    48.6 -114.   3528.
# ℹ 11 more variables: area_sqmi <dbl>, designation <chr>, date <date>,
#   DischargeReliability <dbl>, TempReliability <dbl>, flow_mean <dbl>,
#   flow_min <dbl>, flow_max <dbl>, tempc_mean <dbl>, tempc_min <dbl>,
#   tempc_max <dbl>

Add missing dates

Code
dat_daily <- fill_missing_dates(dat_daily, dates = date, groups = site_name)

View daily mean time series data, for example, site_name = CoalCreekLower. Notice the many shorts gaps in the daily data.

Code
dat_daily %>% filter(site_name == "CoalCreekLower") %>% select(date, flow_mean) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Mean daily flow (cfs)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Check for duplicates

Code
dups <- dat_daily %>% group_by(site_name, date) %>% filter(n()>1) %>% arrange(site_name, date)
nrow(dups)/2
[1] 10085
Code
unique(dups$site_name)
[1] "Shields River nr Livingston NWIS"

2.5 Interpolate missing data

Small periods of missing data (<24 hours) become a problem when aggregating at the daily and weekly time scales. *Note that currently this discovers and fills missing data at the daily time scale, but should be changed to interpolate at the original timescale of the raw data (e.g., hourly).

Code
# explore data gaps
mysites <- unique(dat_daily$site_name)
mynas <- list()
for (i in 1:length(mysites)) {
  mydisch <- unlist(dat_daily$flow_mean[dat_daily$site_name == mysites[i]])
  runsna <- rle(is.na(mydisch))
  mynas[[i]] <- tibble(site_name = mysites[i], run = runsna$lengths[runsna$values == TRUE])
}
mynas <- do.call(rbind, mynas)

Most gaps are relatively short

Code
mynas %>% ggplot() + geom_histogram(aes(x = run)) + xlab("Days") + ylab("Frequency")

Distributiion of lengths of missing data (days)

Zoomed in…

Code
mynas %>% ggplot() + geom_histogram(aes(x = run))  + xlab("Days") + ylab("Frequency") + xlim(0,30)

Distributiion of lengths of missing data (days)

Considering just the short gaps, which are likely most commonly a function of ice effects, which sites are problematic?

Code
mynas %>% filter(run <= 40) %>% select(site_name) %>% ggplot() + geom_bar(aes(x = site_name)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Frequency of short (<40 days) data gaps by site

Fill short gaps (<=7 days) using time series interpolation

Code
datalist <- list()
for (i in 1:length(mysites)) { datalist[[i]] <- dat_daily %>% filter(site_name == mysites[i]) %>% mutate(flow_mean_filled = fillMissing(flow_mean, max.fill = 7, span = 100)) }
dat_daily_fill <- do.call(rbind, datalist)

Explore interpolated/filled time series relative to original (daily) data Again, CoalCreekLower as an example.

Code
dat_daily_fill %>% filter(site_name == "CoalCreekLower") %>% select(date, flow_mean, flow_mean_filled) %>% dygraph() %>% dyRangeSelector() %>% dySeries("flow_mean", strokeWidth = 5, color = "black") %>% dySeries("flow_mean_filled", strokeWidth = 1, color = "red") %>% dyAxis("y", label = "Mean daily flow (cfs)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

2.6 Calculate yield

Code
# convert cfs and basin area to metric
dat_daily_fill <- dat_daily_fill %>% mutate(flow_mean_cms = flow_mean*0.02831683199881, 
                                            flow_mean_filled_cms = flow_mean_filled*0.02831683199881, 
                                            area_sqkm = area_sqmi*2.58999)

# sites
sites <- unique(dat_daily_fill$site_name)

# site-specific basin area in square km
basinarea <- dat_daily_fill %>% filter(!is.na(site_id)) %>% group_by(site_name) %>% summarize(area_sqkm = unique(area_sqkm))

# calculate yield
yield_list <- list()
for (i in 1:length(sites)) {
  d <- dat_daily_fill %>% filter(site_name == sites[i])
  ba <- unlist(basinarea %>% filter(site_name == sites[i]) %>% select(area_sqkm))
  yield_list[[i]] <-  add_daily_yield(data = d, values = flow_mean_cms, basin_area = as.numeric(ba)) %>% left_join(add_daily_yield(data = d, values = flow_mean_filled_cms, basin_area = as.numeric(ba)) %>% rename(Yield_filled_mm = Yield_mm))
}
dat_daily_fill_wyield <- do.call(rbind, yield_list)

2.7 Calculate 7-day means

Code
dat_daily_fill_wyield <- dat_daily_fill_wyield %>%
  group_by(site_name) %>%
  mutate(flow_mean_7 = rollapply(flow_mean, FUN = mean, width = 7, align = "center", fill = NA),
         flow_mean_filled_7 = rollapply(flow_mean_filled, FUN = mean, width = 7, align = "center", fill = NA),
         tempc_mean_7 = rollapply(tempc_mean, FUN = mean, width = 7, align = "center", fill = NA),
         Yield_mm_7 = rollapply(Yield_mm, FUN = mean, width = 7, align = "center", fill = NA),
         Yield_filled_mm_7 = rollapply(Yield_filled_mm, FUN = mean, width = 7, align = "center", fill = NA)) %>%
  ungroup() %>% filter(!is.na(site_id))

2.8 View daily data

View daily time series data by sub-basin (little and medium g’s only)

2.9 Write out and read data

Code
# write out
write_csv(dat_daily_fill_wyield, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv")
dat_daily <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv")

2.10 View data availability

Code
# summarize data availability
dat_summ <- dat_daily %>% filter(site_id != "MRN") %>%
  group_by(basin, date, designation) %>% summarize(numflow = sum(!is.na(flow_mean)), numtemp = sum(!is.na(tempc_mean))) %>% 
  gather(type, avail, numflow:numtemp) %>% mutate(type2 = as.factor(paste(designation, type, sep = "_"))) %>% 
  mutate(type3 = as.numeric(type2), avail = na_if(avail, 0)) %>% ungroup() %>% filter(!is.na(avail))
Code
# plot all years
#| fig-cap: "Data availability by basin, all years"
myplot <- dat_summ %>% ggplot(aes(x = date, y = type3)) + 
  geom_errorbarh(aes(xmax = date, xmin = date, color = avail), size = 0.001) +
  scale_color_continuous(trans = "reverse", na.value = "grey60") +
  scale_y_discrete(limits = c("Big G flow", "Big G temp", "Medium g flow", "Medium g temp", "Little g flow", "Little g temp")) + 
  labs(colour = "Number \nof sites") + ylab("") + xlab("Date") + 
  facet_wrap(~ basin, nrow = 4,
             labeller = as_labeller(c(`West Brook` = "West Brook: G = South River Conway (NWIS)", 
                                      `Staunton River` = "Staunton River: G = Staunton River 10 (UVA)",
                                      `Paine Run` = "Paine Run: G = Paine Run 10 (UVA)",
                                      `Piney River` = "Piney River: G = Piney River 10 (UVA)",
                                      `Snake River` = "Snake River: G = Pacific Creek Moran (NWIS)",
                                      `Shields River` = "Shields River: G = Shields River Livingston (NWIS)",
                                      `Flathead` = "NF Flathead River: G = NF Flathead (NWIS)",
                                      `Donner Blitzen` = "Donner Blitzen River: G = DB Frenchglen (NWIS)",
                                      `Duck Creek` = "Duck Creek: G = Shields River Livingston (NWIS)")))
myplot

Code
# jpeg("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Data Availability/DataAvailability_byBasin.jpg", height = 6, width = 12, units = "in", res = 500)
# myplot
# dev.off()
Code
# plot recent years
#| fig-cap: "Data availability by basin, recent years"
# jpeg("./Data Availability/DataAvailability_byBasin_recent.jpg", height = 6, width = 12, units = "in", res = 500)
myplot <- dat_summ %>% filter(date >= "2018-10-01") %>% ggplot(aes(x = date, y = type3)) + 
  geom_errorbarh(aes(xmax = date, xmin = date, color = avail), size = 0.001) +
  scale_color_continuous(trans = "reverse", na.value = "grey60") +
  scale_y_discrete(limits = c("Big G flow", "Big G temp", "Medium g flow", "Medium g temp", "Little g flow", "Little g temp")) + 
  labs(colour = "Number \nof sites") + ylab("") + xlab("Date") + 
  facet_wrap(~ basin, nrow = 4,
             labeller = as_labeller(c(`West Brook` = "West Brook: G = South River Conway (NWIS)", 
                                      `Staunton River` = "Staunton River: G = Staunton River 10 (UVA)",
                                      `Paine Run` = "Paine Run: G = Paine Run 10 (UVA)",
                                      `Piney River` = "Piney River: G = Piney River 10 (UVA)",
                                      `Snake River` = "Snake River: G = Pacific Creek Moran (NWIS)",
                                      `Shields River` = "Shields River: G = Shields River Livingston (NWIS)",
                                      `Flathead` = "NF Flathead River: G = NF Flathead (NWIS)",
                                      `Donner Blitzen` = "Donner Blitzen River: G = DB Frenchglen (NWIS)",
                                      `Duck Creek` = "Duck Creek: G = Shields River Livingston (NWIS)")))
myplot

2.11 Compare co-located gages

2.11.1 Compare synchronous gages

Compare streamflow data from co-located EcoDrought and NWIS gages with overlapping periods of record

Code
# WEST BROOK
p1 <- dat_daily %>% filter(site_name == "West Brook 0") %>% select(date, flow_mean_7) %>% rename(little = flow_mean_7) %>% 
  left_join(dat_daily %>% filter(site_name == "West Brook NWIS") %>% select(date, flow_mean_7) %>% rename(medium = flow_mean_7)) %>%
  ggplot() + geom_point(aes(x = log(medium), y = log(little))) + geom_abline(intercept = 0, slope = 1, color = "red")

p2 <- dat_daily %>% filter(site_name == "Avery Brook") %>% select(date, flow_mean_7) %>% rename(little = flow_mean_7) %>% 
  left_join(dat_daily %>% filter(site_name == "Avery Broook NWIS") %>% select(date, flow_mean_7) %>% rename(medium = flow_mean_7)) %>%
  ggplot() + geom_point(aes(x = log(medium), y = log(little))) + geom_abline(intercept = 0, slope = 1, color = "red")

# FLATHEAD
p3 <- dat_daily %>% filter(site_name == "BigCreekMiddle") %>% select(date, flow_mean_7) %>% rename(little = flow_mean_7) %>% 
  left_join(dat_daily %>% filter(site_name == "Big Creek NWIS") %>% select(date, flow_mean_7) %>% rename(medium = flow_mean_7)) %>%
  ggplot() + geom_point(aes(x = log(medium), y = log(little))) + geom_abline(intercept = 0, slope = 1, color = "red")

# jpeg("./Data Availability/LittleMedium_Co-Located_Gages.jpg", height = 6, width = 6, units = "in", res = 500)
ggarrange(p1, p2, p3, ncol = 2, nrow = 2, labels = c("West Brook 0", "Avery Brook", "Big Creek (Flathead)"))

Streamflow measured at little g gages (EcoDrought) as a function of streamflow measured at medium g gages (NWIS), on a log-scale. Red line denotes 1:1
Code
# dev.off()

2.11.2 Compare asynchronous gages

For Spread Creek and Shields River, compare data from co-located EcoDrought and NWIS gages, with NON-overlapping periods of record. Note that streamflow from NWIS gages is about ~1 order of magnitude greater than what is measured at EcoD gages.